{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Secure SageMaker on AWS"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import boto3\n",
    "import sagemaker\n",
    "import pandas as pd\n",
    "\n",
    "sess   = sagemaker.Session()\n",
    "bucket = sess.default_bucket()\n",
    "role = sagemaker.get_execution_role()\n",
    "region = boto3.Session().region_name"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Internet Egress"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!curl https://aws.amazon.com/sagemaker/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Data Protection\n",
    "\n",
    "#### Check out the restrictive IAM policy attached to this user profile\n",
    "```\n",
    "{\n",
    "    \"Version\": \"2012-10-17\",\n",
    "    \"Statement\": [\n",
    "        {\n",
    "            \"Action\": [\n",
    "                \"s3:GetObject\",\n",
    "                \"s3:PutObject\",\n",
    "                \"s3:ListObject\"\n",
    "            ],\n",
    "            \"Effect\": \"Allow\",\n",
    "            \"Resource\": [\n",
    "                \"arn:aws:s3:::sagemaker-us-east-1-835319576252-secure\",\n",
    "                \"arn:aws:s3:::sagemaker-us-east-1-835319576252-secure/*\"\n",
    "            ]\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Let's try to copy data over to a different S3 bucket!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "s3://sagemaker-us-east-1-835319576252-secure\n"
     ]
    }
   ],
   "source": [
    "!echo s3://$bucket-secure/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws s3 cp security.ipynb s3://$bucket-secure/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Let's try to copy data over to the allowed S3 bucket!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws s3 cp ./security.ipynb s3://$bucket-secure/"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Let's test permissions again with a bucket policy and a VPC endpoint policy attached!\n",
    "\n",
    "### Bucket policy\n",
    "```\n",
    "{\n",
    "    \"Version\": \"2008-10-17\",\n",
    "    \"Statement\": [\n",
    "        {\n",
    "            \"Effect\": \"Deny\",\n",
    "            \"Principal\": \"*\",\n",
    "            \"Action\": [\n",
    "                \"s3:GetObject\",\n",
    "                \"s3:PutObject\",\n",
    "                \"s3:ListBucket\"\n",
    "            ],\n",
    "            \"Resource\": [\n",
    "                \"arn:aws:s3:::sagemaker-us-east-1-835319576252-secure\",\n",
    "                \"arn:aws:s3:::sagemaker-us-east-1-835319576252-secure/*\"\n",
    "            ],\n",
    "            \"Condition\": {\n",
    "                \"StringNotEquals\": {\n",
    "                    \"aws:sourceVpce\": \"vpce-<ADD_VPC_ID_HERE>\"\n",
    "                }\n",
    "            }\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "```"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### VPC endpoint policy: -- notice changed bucket name\n",
    "```\n",
    "{\n",
    "    \"Version\": \"2012-10-17\",\n",
    "    \"Statement\": [\n",
    "        {\n",
    "            \"Effect\": \"Allow\",\n",
    "            \"Principal\": \"*\",\n",
    "            \"Action\": [\n",
    "                \"s3:GetObject\",\n",
    "                \"s3:PutObject\",\n",
    "                \"s3:ListBucket\"\n",
    "            ],\n",
    "            \"Resource\": [\n",
    "                \"arn:aws:s3:::sagemaker-us-east-1-835319576252-secure-diff-name\",\n",
    "                \"arn:aws:s3:::sagemaker-us-east-1-835319576252-secure-diff-name/*\"\n",
    "            ]\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "fatal error: An error occurred (NoSuchBucket) when calling the ListObjectsV2 operation: The specified bucket does not exist\n"
     ]
    }
   ],
   "source": [
    "# Run this cell in a terminal\n",
    "# TODO:  Not sure what this cell is supposed to do\n",
    "\n",
    "!aws s3 cp s3://$bucket-secure/ ."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train without VPC mode\n",
    "\n",
    "Let's kick off a training job without VPC mode enabled and see what happens"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Writing mnist.py\n"
     ]
    }
   ],
   "source": [
    "# %%writefile mnist.py\n",
    "\n",
    "# import tensorflow as tf\n",
    "# import argparse\n",
    "# import os\n",
    "# import numpy as np\n",
    "\n",
    "# def parse_args():\n",
    "    \n",
    "#     parser = argparse.ArgumentParser()\n",
    "#     parser.add_argument('--train', type=str, default=os.environ.get('SM_CHANNEL_TRAIN'))\n",
    "#     parser.add_argument('--test', type=str, default=os.environ.get('SM_CHANNEL_TEST'))\n",
    "    \n",
    "#     parser.add_argument('--model_dir', type=str, default=os.environ.get('SM_MODEL_DIR'))\n",
    "    \n",
    "#     return parser.parse_known_args()\n",
    "\n",
    "# def get_train_data(train_dir):\n",
    "    \n",
    "#     x_train = np.load(os.path.join(train_dir, 'x_train.npy'))\n",
    "#     y_train = np.load(os.path.join(train_dir, 'y_train.npy'))\n",
    "#     print('x train', x_train.shape,'y train', y_train.shape)\n",
    "\n",
    "#     return x_train, y_train\n",
    "\n",
    "\n",
    "# if __name__ == \"__main__\":\n",
    "    \n",
    "#     args, _ = parse_args()\n",
    "    \n",
    "#     x_train, y_train = get_train_data(args.train)\n",
    "    \n",
    "    \n",
    "#     model = tf.keras.models.Sequential([\n",
    "#             tf.keras.layers.Flatten(input_shape=(28, 28)),\n",
    "#             tf.keras.layers.Dense(128, activation='relu'),\n",
    "#             tf.keras.layers.Dropout(0.2),\n",
    "#             tf.keras.layers.Dense(10, activation='softmax')\n",
    "#     ])\n",
    "\n",
    "#     model.compile(optimizer='adam',\n",
    "#               loss='sparse_categorical_crossentropy',\n",
    "#               metrics=['accuracy'])\n",
    "\n",
    "#     model.fit(x_train, y_train, epochs=1)\n",
    "\n",
    "#     print(\"Training Complete\")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!aws s3 cp x_train.npy s3://$bucket/train/\n",
    "!aws s3 cp y_train.npy s3://$bucket/train/"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sagemaker\n",
    "sess = sagemaker.Session()\n",
    "\n",
    "train_s3 = \"s3://{}/train/\".format(bucket)\n",
    "print(train_s3)\n",
    "inputs = {'train':train_s3}\n",
    "\n",
    "print(inputs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Set IAM Policy to Allow Training Only In Specific VPCs/SecurityGroups/SubnetIds\n",
    "```\n",
    "{\n",
    "    \"Version\": \"2012-10-17\",\n",
    "    \"Statement\": [\n",
    "        {\n",
    "            \"Sid\": \"VPCDeployment\",\n",
    "            \"Effect\": \"Deny\",\n",
    "            \"Action\": [\n",
    "                \"sagemaker:CreateAutoMLJob\",\n",
    "                \"sagemaker:CreateTrainingJob\",\n",
    "                \"sagemaker:CreateProcessingJob\",\n",
    "                \"sagemaker:CreateModel\",\n",
    "                \"sagemaker:CreateHyperParameterTuningJob\"\n",
    "            ],\n",
    "            \"Resource\": \"*\",\n",
    "            \"Condition\": {\n",
    "                \"StringNotEquals\": {\n",
    "                    \"sagemaker:VpcSecurityGroupIds\": \"<ADD_SECURITY_GROUP_IDS_HERE>\",\n",
    "                    \"sagemaker:VpcSubnets\": [\n",
    "                        \"subnet-<ADD_SUBNET_IDS_HERE>\",\n",
    "                        \"subnet-<ADD_SUBNET_IDS_HERE>\"\n",
    "                    ]\n",
    "                }\n",
    "            }\n",
    "        }\n",
    "    ]\n",
    "}\n",
    "```"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker import get_execution_role\n",
    "from sagemaker.tensorflow import TensorFlow\n",
    "\n",
    "role = get_execution_role()\n",
    "print(role)\n",
    "mnist_estimator = TensorFlow(entry_point='mnist.py',\n",
    "                             role=role,\n",
    "                             train_instance_count=1,\n",
    "                             train_instance_type='ml.m5.xlarge',\n",
    "                             framework_version='1.15.2',\n",
    "                             py_version='py3',\n",
    "                             output_path = 's3://{}/output/'.format(bucket))\n",
    "\n",
    "mnist_estimator.fit(inputs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train with VPC mode\n",
    "\n",
    "Let's kick off a training job with VPC mode enabled and make sure that it goes through!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[31mERROR: Could not find a version that satisfies the requirement sagemaker_environment\u001b[0m\n",
      "\u001b[31mERROR: No matching distribution found for sagemaker_environment\u001b[0m\n"
     ]
    }
   ],
   "source": [
    "# !pip install sagemaker_environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "ename": "ModuleNotFoundError",
     "evalue": "No module named 'sagemaker_environment'",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-4-00fecce45936>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0;32mimport\u001b[0m \u001b[0msagemaker_environment\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m      2\u001b[0m \u001b[0msubnet\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msagemaker_environment\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSAGEMAKER_SUBNETS\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0msecurity_group_ids\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msagemaker_environment\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mSAGEMAKER_SECURITY_GROUPS\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[0mkey\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0msagemaker_environment\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mCMK_ID\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mkey\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'sagemaker_environment'"
     ]
    }
   ],
   "source": [
    "# import sagemaker_environment\n",
    "subnet = sagemaker_environment.SAGEMAKER_SUBNETS\n",
    "security_group_ids = sagemaker_environment.SAGEMAKER_SECURITY_GROUPS\n",
    "key = sagemaker_environment.CMK_ID\n",
    "print(key)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker import get_execution_role\n",
    "role = get_execution_role()\n",
    "print(role)\n",
    "mnist_estimator = TensorFlow(entry_point='mnist.py',\n",
    "                             role=role,\n",
    "                             train_instance_count=1,\n",
    "                             train_instance_type='ml.m5.xlarge',\n",
    "                             framework_version='1.15.2',\n",
    "                             py_version='py3',\n",
    "                             output_path='s3://{}/output/'.format(bucket),\n",
    "                             subnets=subnet,\n",
    "                             security_group_ids=security_group_ids,\n",
    "                             output_kms_key=key)\n",
    "\n",
    "mnist_estimator.fit(inputs)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Train without encryption\n",
    "\n",
    "Let's kick off a training job without encryption and check if the SCP kicks in!"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sagemaker import get_execution_role\n",
    "role = get_execution_role()\n",
    "print(role)\n",
    "mnist_estimator = TensorFlow(entry_point='mnist.py',\n",
    "                             role=role,\n",
    "                             train_instance_count=1,\n",
    "                             train_instance_type='ml.m5.xlarge',\n",
    "                             framework_version='1.15.2',\n",
    "                             py_version='py3',\n",
    "                             output_path = 's3://{}/output/'.format(bucket),\n",
    "                             subnets = subnet,\n",
    "                             security_group_ids = security_group_ids)\n",
    "\n",
    "mnist_estimator.fit(inputs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "instance_type": "ml.t3.medium",
  "kernelspec": {
   "display_name": "conda_python3",
   "language": "python",
   "name": "conda_python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
